package com.zhang.spark_1.spark_core.operator.transform

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @title:
 * @author: zhang
 * @date: 2021/12/5 18:51 
 */
object Spark17_RDD_Operator_Transform_test {

  def main(args: Array[String]): Unit = {
    //获取spark的连接
    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("operator")
    val sc: SparkContext = new SparkContext(conf)
    //TODO aggregateByKey
    val rdd: RDD[(String, Int)] = sc.makeRDD(List(
      ("a", 1), ("a", 2), ("b", 3),
      ("b", 4), ("b", 5), ("a", 4)
    ), 2)
    //获取相同key的数据平均值=>(a,3) (b,4)
    val newRDD: RDD[(String, (Int, Int))] = rdd.aggregateByKey((0, 0))(
      (t, v) => (t._1 + v, t._2 + 1),
      (t1, t2) => (t1._1 + t2._1, t1._2 + t2._2)
    )
    // (a,(9,3))=>(a,3)
    newRDD.mapValues(t=>t._1/t._2).collect().foreach(println)
    /*newRDD.mapValues{
      case (num, count) => (num/count)
    }.collect().foreach(println)*/

    sc.stop()

  }
}
